{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "27bc87b7",
   "metadata": {},
   "source": [
    "# Neo4j Graph Store"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "032264ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "# For OpenAI\n",
    "\n",
    "import os\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"API_KEY_HERE\"\n",
    "\n",
    "import logging\n",
    "import sys\n",
    "from llama_index.llms import OpenAI\n",
    "from llama_index import ServiceContext\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "\n",
    "# define LLM\n",
    "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
    "service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6fd36e3b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# For Azure OpenAI\n",
    "import os\n",
    "import json\n",
    "import openai\n",
    "from langchain.llms import AzureOpenAI\n",
    "from langchain.embeddings import OpenAIEmbeddings\n",
    "from llama_index import LangchainEmbedding\n",
    "from llama_index import (\n",
    "    VectorStoreIndex,\n",
    "    SimpleDirectoryReader,\n",
    "    KnowledgeGraphIndex,\n",
    "    LLMPredictor,\n",
    "    ServiceContext,\n",
    ")\n",
    "\n",
    "import logging\n",
    "import sys\n",
    "\n",
    "from IPython.display import Markdown, display\n",
    "\n",
    "logging.basicConfig(\n",
    "    stream=sys.stdout, level=logging.INFO\n",
    ")  # logging.DEBUG for more verbose output\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
    "\n",
    "openai.api_type = \"azure\"\n",
    "openai.api_base = \"https://<foo-bar>.openai.azure.com\"\n",
    "openai.api_version = \"2022-12-01\"\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"<your-openai-key>\"\n",
    "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
    "\n",
    "llm = AzureOpenAI(\n",
    "    deployment_name=\"<foo-bar-deployment>\",\n",
    "    temperature=0,\n",
    "    openai_api_version=openai.api_version,\n",
    "    model_kwargs={\n",
    "        \"api_key\": openai.api_key,\n",
    "        \"api_base\": openai.api_base,\n",
    "        \"api_type\": openai.api_type,\n",
    "        \"api_version\": openai.api_version,\n",
    "    },\n",
    ")\n",
    "llm_predictor = LLMPredictor(llm=llm)\n",
    "\n",
    "# You need to deploy your own embedding model as well as your own chat completion model\n",
    "embedding_llm = LangchainEmbedding(\n",
    "    OpenAIEmbeddings(\n",
    "        model=\"text-embedding-ada-002\",\n",
    "        deployment=\"<foo-bar-deployment>\",\n",
    "        openai_api_key=openai.api_key,\n",
    "        openai_api_base=openai.api_base,\n",
    "        openai_api_type=openai.api_type,\n",
    "        openai_api_version=openai.api_version,\n",
    "    ),\n",
    "    embed_batch_size=1,\n",
    ")\n",
    "\n",
    "service_context = ServiceContext.from_defaults(\n",
    "    llm_predictor=llm_predictor,\n",
    "    embed_model=embedding_llm,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "be3f7baa-1c0a-430b-981b-83ddca9e71f2",
   "metadata": {
    "tags": []
   },
   "source": [
    "## Using Knowledge Graph with Neo4jGraphStore"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "75f1d565-04e8-41bc-9165-166dc89b6b47",
   "metadata": {},
   "source": [
    "#### Building the Knowledge Graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "8d0b2364-4806-4656-81e7-3f6e4b910b5b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index import (\n",
    "    KnowledgeGraphIndex,\n",
    "    LLMPredictor,\n",
    "    ServiceContext,\n",
    "    SimpleDirectoryReader,\n",
    ")\n",
    "from llama_index.storage.storage_context import StorageContext\n",
    "from llama_index.graph_stores import Neo4jGraphStore\n",
    "\n",
    "\n",
    "from llama_index.llms import OpenAI\n",
    "from IPython.display import Markdown, display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1c297fd3-3424-41d8-9d0d-25fe6310ab62",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "documents = SimpleDirectoryReader(\n",
    "    \"../../../../examples/paul_graham_essay/data\"\n",
    ").load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "61679142-7595-492b-8792-26cbc439caf8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# define LLM\n",
    "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
    "service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "832b4970",
   "metadata": {},
   "source": [
    "## Prepare for Neo4j"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7270af8b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: neo4j in /home/tomaz/anaconda3/envs/snakes/lib/python3.9/site-packages (5.11.0)\n",
      "Requirement already satisfied: pytz in /home/tomaz/anaconda3/envs/snakes/lib/python3.9/site-packages (from neo4j) (2023.3)\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "%pip install neo4j\n",
    "\n",
    "username = \"neo4j\"\n",
    "password = \"retractor-knot-thermocouples\"\n",
    "url = \"bolt://44.211.44.239:7687\"\n",
    "database = \"neo4j\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f0edbc99",
   "metadata": {},
   "source": [
    "## Instantiate Neo4jGraph KG Indexes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "370fd08f-56ff-4c24-b0c4-c93116a6d482",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "graph_store = Neo4jGraphStore(\n",
    "    username=username,\n",
    "    password=password,\n",
    "    url=url,\n",
    "    database=database,\n",
    ")\n",
    "\n",
    "storage_context = StorageContext.from_defaults(graph_store=graph_store)\n",
    "\n",
    "# NOTE: can take a while!\n",
    "index = KnowledgeGraphIndex.from_documents(\n",
    "    documents,\n",
    "    storage_context=storage_context,\n",
    "    max_triplets_per_chunk=2,\n",
    "    service_context=service_context,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c39a0eeb-ef16-4982-8ba8-b37c2c5f4437",
   "metadata": {},
   "source": [
    "#### Querying the Knowledge Graph\n",
    "\n",
    "First, we can query and send only the triplets to the LLM."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "670300d8-d0a8-4201-bbcd-4a74b199fcdd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: Tell me more about Interleaf\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['Interleaf']\n",
      "ERROR:llama_index.indices.knowledge_graph.retriever:Index was not constructed with embeddings, skipping embedding usage...\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Extracted relationships: The following are knowledge sequence in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`\n",
      "Interleaf ['IS_ABOUT', 'what not to do']\n",
      "Interleaf ['ADDED', 'scripting language']\n",
      "Interleaf ['MADE', 'software for creating documents']\n"
     ]
    }
   ],
   "source": [
    "query_engine = index.as_query_engine(include_text=False, response_mode=\"tree_summarize\")\n",
    "\n",
    "response = query_engine.query(\"Tell me more about Interleaf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "eecf2d57-3efa-4b0d-941a-95438d42893c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>Interleaf is a subject that is related to \"what not to do\" and \"scripting language\". It is also associated with the predicates \"ADDED\" and \"MADE\", with the objects being \"scripting language\" and \"software for creating documents\" respectively.</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ecd32b8e",
   "metadata": {},
   "source": [
    "For more detailed answers, we can also send the text from where the retrieved tripets were extracted."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "bd14686d-1c53-4637-9340-3745f2121ae2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: Tell me more about what the author worked on at Interleaf\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['Interleaf', 'worked', 'author']\n",
      "ERROR:llama_index.indices.knowledge_graph.retriever:Index was not constructed with embeddings, skipping embedding usage...\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: c3fd9444-6c20-4cdc-9598-8f0e9ed0b85d: each student had. But the Accademia wasn't teaching me anything except Italia...\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: f4bfad23-0cde-4425-99f9-9229ca0a5cc5: learned some useful things at Interleaf, though they were mostly about what n...\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Extracted relationships: The following are knowledge sequence in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`\n",
      "Interleaf ['IS_ABOUT', 'what not to do']\n",
      "Interleaf ['ADDED', 'scripting language']\n",
      "Interleaf ['MADE', 'software for creating documents']\n"
     ]
    }
   ],
   "source": [
    "query_engine = index.as_query_engine(include_text=True, response_mode=\"tree_summarize\")\n",
    "response = query_engine.query(\n",
    "    \"Tell me more about what the author worked on at Interleaf\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "b4c87d14-d2d8-4d80-89f6-1e5972973528",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>At Interleaf, the author worked on software for creating documents. The company had added a scripting language, inspired by Emacs, and the author was hired as a Lisp hacker to write things in it. However, the author admits to being a bad employee and not fully understanding the software, as it was primarily written in C. Despite this, the author was paid well and managed to save enough money to go back to RISD and pay off their college loans. The author also learned some valuable lessons at Interleaf, particularly about what not to do in technology companies.</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ecc7342a",
   "metadata": {},
   "source": [
    "#### Query with embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "b20f9da1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Clean dataset first\n",
    "graph_store.query(\n",
    "    \"\"\"\n",
    "MATCH (n) DETACH DELETE n\n",
    "\"\"\"\n",
    ")\n",
    "\n",
    "# NOTE: can take a while!\n",
    "index = KnowledgeGraphIndex.from_documents(\n",
    "    documents,\n",
    "    storage_context=storage_context,\n",
    "    max_triplets_per_chunk=2,\n",
    "    service_context=service_context,\n",
    "    include_embeddings=True,\n",
    ")\n",
    "\n",
    "query_engine = index.as_query_engine(\n",
    "    include_text=True,\n",
    "    response_mode=\"tree_summarize\",\n",
    "    embedding_mode=\"hybrid\",\n",
    "    similarity_top_k=5,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "01b74b2a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: Tell me more about what the author worked on at Interleaf\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['Interleaf', 'worked', 'author']\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: e0067958-8b62-4186-b78c-a07281531e40: each student had. But the Accademia wasn't teaching me anything except Italia...\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 38459cd5-bc20-428d-a2db-9dc2e716bd15: learned some useful things at Interleaf, though they were mostly about what n...\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 6be24830-85d5-49d1-8caa-d297cd0e8b14: It had been so long since I'd painted anything that I'd half forgotten why I ...\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 2ec81827-d6d5-470d-8851-b97b8d8d80b4: Robert Morris showed it to me when I visited him in Cambridge, where he was n...\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 46b8b977-4176-4622-8d4d-ee3ab16132b4: in decent shape at painting and drawing from the RISD foundation that summer,...\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 71363c09-ec6b-47c8-86ac-e18be46f1cc2: as scare-quotes. At the time this bothered me, but now it seems amusingly acc...\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 2dded283-d876-4014-8352-056fccace896: of my old life. Idelle was in New York at least, and there were other people ...\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: de937aec-ebee-4348-9f23-c94d0a5d7436: and I had a lot of time to think on those flights. On one of them I realized ...\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 33936f7a-0f89-48c7-af9a-171372b4b4b0: What I Worked On\n",
      "\n",
      "February 2021\n",
      "\n",
      "Before college the two main things I worked ...\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Extracted relationships: The following are knowledge sequence in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`\n",
      "('Interleaf', 'made', 'software for creating documents')\n",
      "Interleaf ['MADE', 'software for creating documents']\n",
      "('Interleaf', 'added', 'scripting language')\n",
      "('Interleaf', 'is about', 'what not to do')\n",
      "Interleaf ['ADDED', 'scripting language']\n",
      "Interleaf ['IS_ABOUT', 'what not to do']\n",
      "('I', 'worked on', 'programming')\n",
      "('I', 'worked on', 'writing')\n"
     ]
    }
   ],
   "source": [
    "# query using top 3 triplets plus keywords (duplicate triplets are removed)\n",
    "response = query_engine.query(\n",
    "    \"Tell me more about what the author worked on at Interleaf\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "02084f6d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>At Interleaf, the author worked on writing scripts in a Lisp dialect for the company's software, which was used for creating documents.</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "40b97044-d212-4151-bd72-6ea2cff35a29",
   "metadata": {},
   "source": [
    "#### [Optional] Try building the graph and manually add triplets!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "f9de2ddb-4e82-438b-ba3a-b7680efed944",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.node_parser import SimpleNodeParser"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "137176d9-1bc2-4203-8379-7b285cd41546",
   "metadata": {},
   "outputs": [],
   "source": [
    "node_parser = SimpleNodeParser.from_defaults()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "dc609c08-6fce-444c-84cd-a305fcad6bcd",
   "metadata": {},
   "outputs": [],
   "source": [
    "nodes = node_parser.get_nodes_from_documents(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "21c3ad61-6f2a-4176-96ba-6e9f52d6243d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# initialize an empty index for now\n",
    "index = KnowledgeGraphIndex.from_documents([], storage_context=storage_context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "41e03f7e-bb98-4fe0-9fc0-369be2864a00",
   "metadata": {},
   "outputs": [],
   "source": [
    "# add keyword mappings and nodes manually\n",
    "# add triplets (subject, relationship, object)\n",
    "\n",
    "# for node 0\n",
    "node_0_tups = [\n",
    "    (\"author\", \"worked on\", \"writing\"),\n",
    "    (\"author\", \"worked on\", \"programming\"),\n",
    "]\n",
    "for tup in node_0_tups:\n",
    "    index.upsert_triplet_and_node(tup, nodes[0])\n",
    "\n",
    "# for node 1\n",
    "node_1_tups = [\n",
    "    (\"Interleaf\", \"made software for\", \"creating documents\"),\n",
    "    (\"Interleaf\", \"added\", \"scripting language\"),\n",
    "    (\"software\", \"generate\", \"web sites\"),\n",
    "]\n",
    "for tup in node_1_tups:\n",
    "    index.upsert_triplet_and_node(tup, nodes[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "48b1a666-2f84-4524-851a-66efd2beb611",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: Tell me more about Interleaf\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['Solutions', 'Interleaf', 'Software', 'Information', 'Technology']\n",
      "ERROR:llama_index.indices.knowledge_graph.retriever:Index was not constructed with embeddings, skipping embedding usage...\n",
      "INFO:llama_index.indices.knowledge_graph.retriever:> Extracted relationships: The following are knowledge sequence in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`\n",
      "Interleaf ['MADE_SOFTWARE_FOR', 'creating documents']\n",
      "Interleaf ['IS_ABOUT', 'what not to do']\n",
      "Interleaf ['ADDED', 'scripting language']\n",
      "Interleaf ['MADE', 'software for creating documents']\n"
     ]
    }
   ],
   "source": [
    "query_engine = index.as_query_engine(include_text=False, response_mode=\"tree_summarize\")\n",
    "\n",
    "response = query_engine.query(\"Tell me more about Interleaf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "fb4b99d7-452f-4594-94e9-da10a3a23fb8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>\n",
       "Interleaf is a software company that specializes in creating documents. It has added a scripting language to its software to make it easier for users to create documents. It also provides advice on what not to do when creating documents.</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6945e03b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "firstEnv",
   "language": "python",
   "name": "firstenv"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
